# -*- coding: utf-8 -*-
import scrapy
import re

class ExampleSpider(scrapy.Spider):
    name = 'example'
    
    def start_requests(self):
        n=1
        url=''
        while n <= 50:
            url='http://search.51job.com/list/000000,000000,0000,00,9,99,%25E5%25A4%25A7%25E6%2595%25B0%25E6%258D%25AE%2B%25E4%25BA%25A7%25E5%2593%2581,2,'+str(n)+'.html?lang=c&stype=1&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=5&dibiaoid=0&address=&line=&specialarea=00&from=&welfare='
            #url='http://search.51job.com/list/000000,000000,0000,00,3,99,%25E5%25A4%25A7%25E6%2595%25B0%25E6%258D%25AE%2B%25E4%25BA%25A7%25E5%2593%2581,2,'+str(n)+'.html?lang=c&stype=1&postchannel=0000&workyear=99&cotype=99&degreefrom=05&jobterm=99&companysize=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=7&dibiaoid=0&address=&line=&specialarea=00&from=&welfare=';
            n=n+1
            yield scrapy.Request(url=url, callback=self.parse)
    def parse(self, response):
        filename = self.getPage(response.url)
        with open(filename, 'wb') as f:
            f.write(response.body)
    def getPage(self, url):
        pageinfo = re.match(r'.*?(\d+)\.html.*?', url)
        page = pageinfo.groups(0)
        filename = './data/list-%s.html' % page
        return filename